import scrapy
from ..items import SummaryItem


class SummarySpider(scrapy.Spider):
    name = "summary"
    allowed_domains = ["www.apta.net.cn"]
    job_code = ['3000011',
                '3000097',
                '3000185',
                '3000144',
                '3000194',
                '3000205',
                '3000207',
                '3000422',
                '3000811',
                '3000758',
                '3000841',
                '3000983',
                '0605001',
                '0605031',
                '0605024',
                '0607050',
                '0606005',
                '0606006',
                '0609007',
                '0609008',
                '0604026',
                '0609011',
                '0607061',
                '0608028',
                '0604031',
                '0604042',
                '0603002',
                '0608032',
                '0608034',
                '0601004',
                '0601088',
                '0603011',
                '0601008',
                '0601081',
                '0601084',
                '0608069']
    start_urls = ["http://www.apta.net.cn/Officer/Summary?examid=329&&pcode=" + x for x in job_code]

    def parse(self, response):
        item = SummaryItem()
        update_times = \
            response.xpath('/html/body/table/tr/td/table/tr[2]/td/table/tr[1]/td/span[2]').xpath('text()').extract()[
                0].strip()
        trs = response.xpath("/html/body/table/tr/td/table/tr[2]/td/table/tr[4]")
        tds = trs.xpath("td")
        item['job_code'] = tds.xpath("text()").extract()[0].strip().replace('·', '')
        item['manager_name'] = tds.xpath("text()").extract()[1].strip()
        item['plan_department'] = tds.xpath("text()").extract()[2].strip()
        item['plan_num'] = int(tds.xpath("text()").extract()[3].strip())
        item['submit_num'] = int(tds.xpath("text()").extract()[4].strip())
        item['qualified_num'] = int(tds.xpath("text()").extract()[5].strip())
        item['payment_num'] = int(tds.xpath("text()").extract()[6].strip())
        item['update_time'] = update_times[6:16].replace('月', '-').replace('日 ', '-').replace('时', '')
        yield item

        pass
